import os
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats.kde import gaussian_kde
from numpy import linspace
import seaborn as sns
import gzip
import HTSeq
from functools import reduce
from matplotlib_venn import venn2
from matplotlib_venn import venn3
results_path = "/scicore/home/zavolan/gypas/projects/EXTERNAL/Evan_NMD/long_reads_alternative_analysis/results/comparison/"
# splicing
SE_path_dKD = os.path.join(results_path, "comparison_scr_4_5_6_dKD/rmats/SE.MATS.JCEC.txt")
SE_path_SMG6 = os.path.join(results_path, "comparison_scr_1_2_3_SMG6KD/rmats/SE.MATS.JCEC.txt")
SE_path_SMG7 = os.path.join(results_path, "comparison_scr_4_5_6_SMG7KD/rmats/SE.MATS.JCEC.txt")
SE_path_UPF1 = os.path.join(results_path, "comparison_scr_1_2_3_UPF1KD/rmats/SE.MATS.JCEC.txt")
SE_path_SMG6KD_SMG6rescue = os.path.join(results_path, "comparison_SMG6KD_SMG6rescue/rmats/SE.MATS.JCEC.txt")
SE_path_SMG7KD_SMG7rescue = os.path.join(results_path, "comparison_SMG7KD_SMG7rescue/rmats/SE.MATS.JCEC.txt")
SE_path_UPF1KD_UPF1rescue = os.path.join(results_path, "comparison_UPF1KD_UPF1rescue/rmats/SE.MATS.JCEC.txt")
SE_path_dKD_dKDSMG6rescue = os.path.join(results_path, "comparison_dKD_dKDSMG6rescue/rmats/SE.MATS.JCEC.txt")
SE_path_dKD_dKDSMG7rescue = os.path.join(results_path, "comparison_dKD_dKDSMG7rescue/rmats/SE.MATS.JCEC.txt")
def mats_prepare_events_scr_vs_KD(events_path):
"""Prepare events for rMATS"""
events = pd.read_csv(events_path, header=0, sep="\t")
# calculate mean expression levels
IncLevel1_mean = pd.DataFrame(events.IncLevel1.str.split(',',2).tolist(),
columns=['IncLevel1_1', 'IncLevel1_2', 'IncLevel1_3'])
IncLevel2_mean = pd.DataFrame(events.IncLevel2.str.split(',',2).tolist(),
columns=['IncLevel2_1', 'IncLevel2_2', 'IncLevel2_3'])
IncLevel1_mean['IncLevel1_1'] = pd.to_numeric(IncLevel1_mean['IncLevel1_1'], errors='coerce')
IncLevel1_mean['IncLevel1_2'] = pd.to_numeric(IncLevel1_mean['IncLevel1_2'], errors='coerce')
IncLevel1_mean['IncLevel1_3'] = pd.to_numeric(IncLevel1_mean['IncLevel1_3'], errors='coerce')
IncLevel2_mean['IncLevel2_1'] = pd.to_numeric(IncLevel2_mean['IncLevel2_1'], errors='coerce')
IncLevel2_mean['IncLevel2_2'] = pd.to_numeric(IncLevel2_mean['IncLevel2_2'], errors='coerce')
IncLevel2_mean['IncLevel2_3'] = pd.to_numeric(IncLevel2_mean['IncLevel2_3'], errors='coerce')
events['Mean PSI Scr'] = np.mean(IncLevel1_mean, axis=1)
events['Mean PSI KD'] = np.mean(IncLevel2_mean, axis=1)
events['Significant'] = 'No'
events.loc[events["FDR"]<0.05, 'Significant'] = 'FDR<0.05'
events.loc[((events["FDR"]<0.05) & (abs(events["IncLevelDifference"])>0.3)), 'Significant'] = 'FDR<0.05 & abs(IncLevelDifference)>0.3'
return events
def mats_prepare_events_KD_vs_rescue(events_path):
"""Prepare events for rMATS"""
events = pd.read_csv(events_path, header=0, sep="\t")
# calculate mean expression levels
IncLevel1_mean = pd.DataFrame(events.IncLevel1.str.split(',',2).tolist(),
columns=['IncLevel1_1', 'IncLevel1_2', 'IncLevel1_3'])
IncLevel2_mean = pd.DataFrame(events.IncLevel2.str.split(',',2).tolist(),
columns=['IncLevel2_1', 'IncLevel2_2', 'IncLevel2_3'])
IncLevel1_mean['IncLevel1_1'] = pd.to_numeric(IncLevel1_mean['IncLevel1_1'], errors='coerce')
IncLevel1_mean['IncLevel1_2'] = pd.to_numeric(IncLevel1_mean['IncLevel1_2'], errors='coerce')
IncLevel1_mean['IncLevel1_3'] = pd.to_numeric(IncLevel1_mean['IncLevel1_3'], errors='coerce')
IncLevel2_mean['IncLevel2_1'] = pd.to_numeric(IncLevel2_mean['IncLevel2_1'], errors='coerce')
IncLevel2_mean['IncLevel2_2'] = pd.to_numeric(IncLevel2_mean['IncLevel2_2'], errors='coerce')
IncLevel2_mean['IncLevel2_3'] = pd.to_numeric(IncLevel2_mean['IncLevel2_3'], errors='coerce')
events['Mean PSI KD'] = np.mean(IncLevel1_mean, axis=1)
events['Mean PSI rescue'] = np.mean(IncLevel2_mean, axis=1)
events['Significant'] = 'No'
events.loc[events["FDR"]<0.05, 'Significant'] = 'FDR<0.05'
events.loc[((events["FDR"]<0.05) & (abs(events["IncLevelDifference"])>0.3)), 'Significant'] = 'FDR<0.05 & abs(IncLevelDifference)>0.3'
return events
path = SE_path_dKD
SE_MATS_dKD = mats_prepare_events_scr_vs_KD(path)
SE_MATS_dKD_short = SE_MATS_dKD.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI KD", "FDR", "IncLevelDifference"]]
SE_MATS_dKD_short.columns = SE_MATS_dKD_short.columns + "_dKD"
SE_MATS_dKD_short.reset_index(inplace=True)
path = SE_path_SMG6
SE_MATS_SMG6 = mats_prepare_events_scr_vs_KD(path)
SE_MATS_SMG6_short = SE_MATS_SMG6.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI KD", "FDR", "IncLevelDifference"]]
SE_MATS_SMG6_short.columns = SE_MATS_SMG6_short.columns + "_SMG6"
SE_MATS_SMG6_short.reset_index(inplace=True)
path = SE_path_SMG7
SE_MATS_SMG7 = mats_prepare_events_scr_vs_KD(path)
SE_MATS_SMG7_short = SE_MATS_SMG7.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI KD", "FDR", "IncLevelDifference"]]
SE_MATS_SMG7_short.columns = SE_MATS_SMG7_short.columns + "_SMG7"
SE_MATS_SMG7_short.reset_index(inplace=True)
path = SE_path_UPF1
SE_MATS_UPF1 = mats_prepare_events_scr_vs_KD(path)
SE_MATS_UPF1_short = SE_MATS_UPF1.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI KD", "FDR", "IncLevelDifference"]]
SE_MATS_UPF1_short.columns = SE_MATS_UPF1_short.columns + "_UPF1"
SE_MATS_UPF1_short.reset_index(inplace=True)
path = SE_path_SMG6KD_SMG6rescue
SE_MATS_SMG6KD_SMG6rescue = mats_prepare_events_KD_vs_rescue(path)
SE_MATS_SMG6KD_SMG6rescue_short = SE_MATS_SMG6KD_SMG6rescue.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI rescue", "FDR", "IncLevelDifference"]]
SE_MATS_SMG6KD_SMG6rescue_short.columns = SE_MATS_SMG6KD_SMG6rescue_short.columns + "_SMG6rescue"
SE_MATS_SMG6KD_SMG6rescue_short.reset_index(inplace=True)
path = SE_path_SMG7KD_SMG7rescue
SE_MATS_SMG7KD_SMG7rescue = mats_prepare_events_KD_vs_rescue(path)
SE_MATS_SMG7KD_SMG7rescue_short = SE_MATS_SMG7KD_SMG7rescue.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI rescue", "FDR", "IncLevelDifference"]]
SE_MATS_SMG7KD_SMG7rescue_short.columns = SE_MATS_SMG7KD_SMG7rescue_short.columns + "_SMG7rescue"
SE_MATS_SMG7KD_SMG7rescue_short.reset_index(inplace=True)
path = SE_path_UPF1KD_UPF1rescue
SE_MATS_UPF1KD_UPF1rescue = mats_prepare_events_KD_vs_rescue(path)
SE_MATS_UPF1KD_UPF1rescue_short = SE_MATS_UPF1KD_UPF1rescue.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI rescue", "FDR", "IncLevelDifference"]]
SE_MATS_UPF1KD_UPF1rescue_short.columns = SE_MATS_UPF1KD_UPF1rescue_short.columns + "_UPF1rescue"
SE_MATS_UPF1KD_UPF1rescue_short.reset_index(inplace=True)
path = SE_path_dKD_dKDSMG6rescue
SE_MATS_dKD_dKDSMG6rescue = mats_prepare_events_KD_vs_rescue(path)
SE_MATS_dKD_dKDSMG6rescue_short = SE_MATS_dKD_dKDSMG6rescue.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI rescue", "FDR", "IncLevelDifference"]]
SE_MATS_dKD_dKDSMG6rescue_short.columns = SE_MATS_dKD_dKDSMG6rescue_short.columns + "_dKDSMG6rescue"
SE_MATS_dKD_dKDSMG6rescue_short.reset_index(inplace=True)
path = SE_path_dKD_dKDSMG7rescue
SE_MATS_dKD_dKDSMG7rescue = mats_prepare_events_KD_vs_rescue(path)
SE_MATS_dKD_dKDSMG7rescue_short = SE_MATS_dKD_dKDSMG7rescue.set_index(["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"])[["Mean PSI rescue", "FDR", "IncLevelDifference"]]
SE_MATS_dKD_dKDSMG7rescue_short.columns = SE_MATS_dKD_dKDSMG7rescue_short.columns + "_dKDSMG7rescue"
SE_MATS_dKD_dKDSMG7rescue_short.reset_index(inplace=True)
SE_MATS_dKD_short.head()
dfs = [SE_MATS_dKD_short,
SE_MATS_SMG6_short,
SE_MATS_SMG7_short,
SE_MATS_UPF1_short,
SE_MATS_SMG6KD_SMG6rescue_short,
SE_MATS_SMG7KD_SMG7rescue_short,
SE_MATS_UPF1KD_UPF1rescue_short,
SE_MATS_dKD_dKDSMG6rescue_short,
SE_MATS_dKD_dKDSMG7rescue_short
]
df_all = reduce(lambda left,right: pd.merge(left,right,on=["GeneID", "chr", "strand", "exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE"]), dfs)
df_all.head()
df_all["Significant"] = "No"
df_all.loc[(df_all["FDR_dKD"]<0.05) & (df_all["FDR_dKDSMG6rescue"]<0.05), "Significant"] = "NMD targets"
sns.set(font_scale=2)
ax = sns.lmplot(x='IncLevelDifference_dKD',
y='IncLevelDifference_dKDSMG6rescue',
hue='Significant',
data=df_all,
fit_reg=False,
legend=True,
palette=["gray", "red"],
hue_order = ["No", "NMD targets"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("deltaPSI for Skipping Exons (SE)")
df_all["Significant"] = "No"
df_all.loc[(df_all["FDR_dKD"]<0.05) & (df_all["FDR_dKDSMG7rescue"]<0.05), "Significant"] = "NMD targets"
sns.set(font_scale=2)
ax = sns.lmplot(x='IncLevelDifference_dKD',
y='IncLevelDifference_dKDSMG7rescue',
hue='Significant',
data=df_all,
fit_reg=False,
legend=True,
palette=["gray", "red"],
hue_order = ["No", "NMD targets"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("deltaPSI for Skipping Exons (SE)")
df_all.reset_index(inplace=True)
df_all["event_id"] = df_all["GeneID"] + "_" + \
df_all["chr"] + "_" + \
df_all["strand"] + "_" + \
df_all["exonStart_0base"].map(str) + "_" + \
df_all["exonEnd"].map(str) + "_" + \
df_all["upstreamES"].map(str) + "_" + \
df_all["upstreamEE"].map(str) + "_" + \
df_all["downstreamES"].map(str) + "_" + \
df_all["downstreamEE"].map(str)
NMD_targets_SMG7 = df_all[(df_all["FDR_dKD"]<0.05) & (df_all["FDR_dKDSMG7rescue"]<0.05)]["event_id"].tolist()
NMD_targets_SMG6 = df_all[(df_all["FDR_dKD"]<0.05) & (df_all["FDR_dKDSMG6rescue"]<0.05)]["event_id"].tolist()
len(NMD_targets_SMG7), len(NMD_targets_SMG6)
NMD_targets = list(set(NMD_targets_SMG7 + NMD_targets_SMG6))
len(NMD_targets)
NMD_targets_genes = []
NMD_targets_exons = []
for target in NMD_targets:
target_sp = target.strip().split("_")
NMD_targets_genes.append(target_sp[0])
NMD_targets_exons.append(target_sp[1] + ":" + str(target_sp[3]) + "-" + str(target_sp[4]) + ":" + target_sp[2])
NMD_targets_genes = list(set(NMD_targets_genes))
NMD_targets_exons = list(set(NMD_targets_exons))
len(NMD_targets_genes), len(NMD_targets_exons)
path = SE_path_dKD
SE_MATS_JCEC = mats_prepare_events_scr_vs_KD(path)
SE_MATS_JCEC["event_id"] = SE_MATS_JCEC["GeneID"] + "_" + \
SE_MATS_JCEC["chr"] + "_" + \
SE_MATS_JCEC["strand"] + "_" + \
SE_MATS_JCEC["exonStart_0base"].map(str) + "_" + \
SE_MATS_JCEC["exonEnd"].map(str) + "_" + \
SE_MATS_JCEC["upstreamES"].map(str) + "_" + \
SE_MATS_JCEC["upstreamEE"].map(str) + "_" + \
SE_MATS_JCEC["downstreamES"].map(str) + "_" + \
SE_MATS_JCEC["downstreamEE"].map(str)
SE_MATS_JCEC["Significant"] = "No"
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray"],
hue_order = ["No"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
SE_MATS_JCEC.loc[SE_MATS_JCEC["FDR"]<0.05, 'Significant'] = 'FDR<0.05'
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black"],
hue_order = ["No", "FDR<0.05"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
SE_MATS_JCEC["MND_target"] = "No"
SE_MATS_JCEC.loc[SE_MATS_JCEC["event_id"].isin(NMD_targets), "MND_target"] = "Yes"
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='MND_target',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black"],
hue_order = ["No", "Yes"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
SE_MATS_JCEC.loc[(SE_MATS_JCEC["FDR"]<0.05) & (SE_MATS_JCEC["MND_target"]=="Yes"), "Significant"] = "FDR<0.05 &\nNMD target"
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black", "red"],
hue_order = ["No", "FDR<0.05", "FDR<0.05 &\nNMD target"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
barplot_stats = pd.DataFrame(SE_MATS_JCEC["Significant"].value_counts())
barplot_stats
bed_novel = pd.read_csv(
"/scicore/home/zavolan/gypas/projects/EXTERNAL/Evan_NMD/long_reads_alternative_analysis/results/comparison/comparison_scr_4_5_6_dKD/rmats_processed/SE.MATS.JCEC.novel.bed",
header=None,
sep="\t"
)
bed_novel.columns = ["chr", "exonStart_0base", "exonEnd", "N", "score", "strand"]
bed_novel["id"] = "chr" + bed_novel["chr"] + ":" + bed_novel["exonStart_0base"].astype(str) + "-" + bed_novel["exonEnd"].astype(str) + ":" + bed_novel["strand"].astype(str)
bed_novel_ids = bed_novel["id"].tolist()
len(bed_novel_ids)
bed_novel_ids = list(set(bed_novel_ids))
len(bed_novel_ids)
path = SE_path_dKD
SE_MATS_JCEC = mats_prepare_events_scr_vs_KD(path)
SE_MATS_JCEC["event_id"] = SE_MATS_JCEC["GeneID"] + "_" + \
SE_MATS_JCEC["chr"] + "_" + \
SE_MATS_JCEC["strand"] + "_" + \
SE_MATS_JCEC["exonStart_0base"].map(str) + "_" + \
SE_MATS_JCEC["exonEnd"].map(str) + "_" + \
SE_MATS_JCEC["upstreamES"].map(str) + "_" + \
SE_MATS_JCEC["upstreamEE"].map(str) + "_" + \
SE_MATS_JCEC["downstreamES"].map(str) + "_" + \
SE_MATS_JCEC["downstreamEE"].map(str)
SE_MATS_JCEC["Significant"] = "No"
SE_MATS_JCEC.loc[SE_MATS_JCEC["FDR"]<0.05, 'Significant'] = 'FDR<0.05'
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black"],
hue_order = ["No", "FDR<0.05"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
SE_MATS_JCEC["exon"] = SE_MATS_JCEC["chr"] + ":" + SE_MATS_JCEC["exonStart_0base"].map(str) + "-" + SE_MATS_JCEC["exonEnd"].map(str) + ":" + SE_MATS_JCEC["strand"]
SE_MATS_JCEC.loc[(SE_MATS_JCEC["exon"].isin(bed_novel_ids)) & (SE_MATS_JCEC["FDR"]<0.05), "Significant"] = "FDR<0.05 &\nnovel exon"
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black", "red"],
hue_order = ["No", "FDR<0.05", "FDR<0.05 &\nnovel exon"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
SE_MATS_JCEC.loc[(SE_MATS_JCEC["exon"].isin(bed_novel_ids)) & \
(SE_MATS_JCEC["FDR"]<0.05) & \
(SE_MATS_JCEC["exon"].isin(NMD_targets_exons)), "Significant"] = "FDR<0.05 &\nnovel exon &\nNMD target"
sns.set(font_scale=2)
ax = sns.lmplot(x='Mean PSI Scr',
y='Mean PSI KD',
hue='Significant',
data=SE_MATS_JCEC,
fit_reg=False,
legend=True,
palette=["gray", "black", "red", "yellow"],
hue_order = ["No", "FDR<0.05", "FDR<0.05 &\nnovel exon", "FDR<0.05 &\nnovel exon &\nNMD target"],
height=15,
aspect=1
)
ax = plt.gca()
ax.set_title("Mean PSI for Skipping Exons (SE)")
barplot_stats = pd.DataFrame(SE_MATS_JCEC["Significant"].value_counts())
barplot_stats
path = SE_path_dKD
SE_MATS_JCEC = mats_prepare_events_scr_vs_KD(path)
gene_ids_se = list(set(SE_MATS_JCEC[(SE_MATS_JCEC["FDR"] < 0.05)]["GeneID"].tolist()))
# gene expression
DE_path = os.path.join(results_path, "comparison_scr_4_5_6_dKD/DE_edgeR/final_table.tsv")
DE = pd.read_csv(DE_path, sep="\t", header=0)
DE["Significant"] = "No"
DE.loc[DE["FDR"]<0.05, "Significant"] = "FDR<0.05"
DE.head()
DE.Significant.value_counts()
sns.set(font_scale=2)
ax = sns.lmplot(x='logCPM',
y='logFC',
hue='Significant',
data=DE,
fit_reg=False,
legend=True,
palette=["gray", "black"],
hue_order = ["No", "FDR<0.05"],
height=15,
aspect=1
)
DE['Significant merged'] = 'No'
DE.loc[DE["FDR"]<0.05, "Significant merged"] = "FDR<0.05"
DE.loc[(DE["id"].isin(gene_ids_se)) & (DE["Significant"]=="No"), 'Significant merged'] = 'Splicing event, no gene change'
DE.loc[(DE["id"].isin(gene_ids_se)) & (DE["Significant"]=="FDR<0.05"), 'Significant merged'] = 'Splicing event, gene change'
sns.set(font_scale=2)
ax = sns.lmplot(x='logCPM',
y='logFC',
hue='Significant merged',
data=DE,
fit_reg=False,
legend=True,
palette=["gray", "black", "yellow", "red"],
hue_order = ["No", "FDR<0.05",
"Splicing event, no gene change",
"Splicing event, gene change"],
height=15,
aspect=1
)
barplot_stats = pd.DataFrame(DE["Significant merged"].value_counts())
barplot_stats
path = SE_path_dKD
SE_MATS_JCEC = mats_prepare_events_scr_vs_KD(path)
gene_ids_se = list(set(SE_MATS_JCEC[(SE_MATS_JCEC["FDR"] < 0.05)]["GeneID"].tolist()))
DE = pd.read_csv(DE_path, sep="\t", header=0)
DE["Significant"] = "No"
DE.loc[DE["FDR"]<0.05, "Significant"] = "FDR<0.05"
DE.head()
DE["NMD_target"] = "No"
DE.loc[DE["id"].isin(NMD_targets_genes), "NMD_target"] = "Yes"
DE["Significant merged"] = "No"
DE.loc[DE["FDR"]<0.05, "Significant merged"] = "FDR<0.05"
DE.loc[(DE["id"].isin(NMD_targets_genes)) & (DE["Significant"]=="No"), 'Significant merged'] = 'NMD target, no gene change'
DE.loc[(DE["id"].isin(NMD_targets_genes)) & (DE["Significant"]=="FDR<0.05"), 'Significant merged'] = 'NMD target, gene change'
sns.set(font_scale=2)
ax = sns.lmplot(x='logCPM',
y='logFC',
hue='Significant merged',
data=DE,
fit_reg=False,
legend=True,
palette=["gray", "black", "yellow", "red"],
hue_order = ["No", "FDR<0.05",
"NMD target, no gene change",
"NMD target, gene change"],
height=15,
aspect=1
)
barplot_stats = pd.DataFrame(DE["Significant merged"].value_counts())
barplot_stats